{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "1e1ca7ca-264b-429e-b7a9-ee31bd1e6e2e",
   "metadata": {},
   "source": [
    "## Aromic scattering factors\n",
    "\n",
    "source: https://henke.lbl.gov/optical_constants/asf.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eb146023-9199-4927-b15a-53aedc19902a",
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "import pandas as pd\n",
    "from mendeleev import element\n",
    "from mendeleev.db import get_session\n",
    "from mendeleev.models import ScatteringFactor, PropertyMetadata, ValueOrigin"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8e3f5828-3b08-4947-bcf7-6ceefd97c8dd",
   "metadata": {},
   "outputs": [],
   "source": [
    "root = Path(\"../data/atomic-scattering-factors/\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6a7ca7c6-317e-488e-a566-f6cea3954531",
   "metadata": {},
   "source": [
    "## Preprocess the files\n",
    "\n",
    "- some rows have traling tabs \"\\t\" that break parsing\n",
    "- a few file start with a comment or non standard header - manually correct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2f4a618d-364f-4e1b-9dc2-f24a54ac13cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "proc = root.joinpath(\"preprocessed\")\n",
    "proc.mkdir(exist_ok=True)\n",
    "\n",
    "for file in root.glob(\"*.nff\"):\n",
    "\n",
    "    with file.open(\"r\") as fo:\n",
    "        lines = fo.readlines()\n",
    "    \n",
    "    with proc.joinpath(file.name).open(\"w\") as fo:\n",
    "        for line in lines:\n",
    "            fo.write(line.strip() + \"\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "810be02e-d7bd-4d51-947f-549f3e270a1d",
   "metadata": {},
   "source": [
    "## Load the data into the db"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cbb5401d-7c1b-476e-aae0-d8381d3dad53",
   "metadata": {},
   "outputs": [],
   "source": [
    "session = get_session(read_only=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "27b8c7ec-7943-446c-ab63-17d3f418cfe5",
   "metadata": {},
   "outputs": [],
   "source": [
    "for file in proc.glob(\"*.nff\"):\n",
    "    e = element(file.stem.capitalize())\n",
    "    print(file.stem.capitalize(), e.atomic_number)\n",
    "    df = pd.read_csv(file, sep=\"\\t\", header=0, engine=\"python\", comment=\"#\")\n",
    "    print(df.columns)\n",
    "    for index, row in df.iterrows():\n",
    "        scattering_factor = ScatteringFactor(\n",
    "            atomic_number=e.atomic_number,\n",
    "            energy=row['E(eV)'],\n",
    "            f1=row['f1'],\n",
    "            f2=row['f2']\n",
    "        )\n",
    "        session.add(scattering_factor)\n",
    "    session.commit()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "89a898e9-e2ae-4eef-88c3-dab163a5dc7d",
   "metadata": {},
   "source": [
    "## Test if the numbers of rows in the Scattering Factors table are the same as rows in the DF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7c041915-a112-4880-832d-998ed2ad1cea",
   "metadata": {},
   "outputs": [],
   "source": [
    "for file in proc.glob(\"*.nff\"):\n",
    "    e = element(file.stem.capitalize())\n",
    "    print(file.stem.capitalize(), e.atomic_number)\n",
    "    df = pd.read_csv(file, sep=\"\\t\", header=0, engine=\"python\", comment=\"#\")\n",
    "    db_rows = session.query(ScatteringFactor).filter_by(atomic_number=e.atomic_number).count()\n",
    "    \n",
    "    if db_rows == df.shape[0]:\n",
    "        print(\"OK\")\n",
    "    else:\n",
    "        print(f\"ERROR {db_rows=} {df.shape[0]=}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "45576c65-5042-42a2-8890-48c34ecae2af",
   "metadata": {},
   "source": [
    "## Create PropertyMetdata records"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6a08f783-a63f-4ffe-b49c-1c26c21c7d04",
   "metadata": {},
   "outputs": [],
   "source": [
    "z = PropertyMetadata(\n",
    "    annotations=None,\n",
    "    attribute_name=\"atomic_number\",\n",
    "    category=\"electric and optical properties\",\n",
    "    citation_keys=\"atomic_scattering_factors,henke1993xray\",\n",
    "    table_name=\"scattering_factors\",\n",
    "    class_name=\"ScatteringFactor\",\n",
    "    column_name=\"atomic_number\",\n",
    "    description=\"Atomic number\",\n",
    "    unit=None,\n",
    "    value_origin=ValueOrigin.STORED,\n",
    ")\n",
    "session.add(z)\n",
    "session.commit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dfb62a0f-65e6-4bd7-b459-87a8e252d130",
   "metadata": {},
   "outputs": [],
   "source": [
    "energy = PropertyMetadata(\n",
    "    annotations=\"specific data references available at cited data source\",\n",
    "    attribute_name=\"energy\",\n",
    "    category=\"electric and optical properties\",\n",
    "    citation_keys=\"atomic_scattering_factors,henke1993xray\",\n",
    "    table_name=\"scattering_factors\",\n",
    "    class_name=\"ScatteringFactor\",\n",
    "    column_name=\"energy\",\n",
    "    description=\"Energy of the incident photon\",\n",
    "    unit=\"eV\",\n",
    "    value_origin=ValueOrigin.STORED,\n",
    ")\n",
    "session.add(energy)\n",
    "session.commit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "af7a8a3a-cbca-4f62-a961-0d34ad729ce3",
   "metadata": {},
   "outputs": [],
   "source": [
    "f1 = PropertyMetadata(\n",
    "    annotations=\"specific data references available at cited data source\",\n",
    "    attribute_name=\"f1\",\n",
    "    category=\"electric and optical properties\",\n",
    "    citation_keys=\"atomic_scattering_factors,henke1993xray\",\n",
    "    table_name=\"scattering_factors\",\n",
    "    class_name=\"ScatteringFactor\",\n",
    "    column_name=\"f1\",\n",
    "    description=\"Scattering factor f1\",\n",
    "    unit=None,\n",
    "    value_origin=ValueOrigin.STORED,\n",
    ")\n",
    "session.add(f1)\n",
    "session.commit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ab8d29db-2bf8-4ea3-bf33-5ee81b55f3c8",
   "metadata": {},
   "outputs": [],
   "source": [
    "f2 = PropertyMetadata(\n",
    "    annotations=\"specific data references available at cited data source\",\n",
    "    attribute_name=\"f2\",\n",
    "    category=\"electric and optical properties\",\n",
    "    citation_keys=\"atomic_scattering_factors,henke1993xray\",\n",
    "    table_name=\"scattering_factors\",\n",
    "    class_name=\"ScatteringFactor\",\n",
    "    column_name=\"f2\",\n",
    "    description=\"Scattering factor f2\",\n",
    "    unit=None,\n",
    "    value_origin=ValueOrigin.STORED,\n",
    ")\n",
    "session.add(f2)\n",
    "session.commit()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
